using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;

namespace Subkismet.Services.GoogleSafeBrowsing
{
    /// <summary>
    /// Manages the lookup process for match URLs accroding to Google spec.
    /// </summary>
    internal class Lookup
    {
        /// <summary>
        /// Public constructor.
        /// </summary>
        public Lookup()
        {
        }

        /// <summary>
        /// Returns a list of matched URLs for lookup process.
        /// </summary>
        /// <param name="url">String value of the incoming URL.</param>
        /// <returns>A list of strings for matched URLs.</returns>
        public List<string> GetUrls(string url)
        {
            List<string> results = new List<string>();

            string hostname = GetExactHostname(url);
            string urlWithoutParameters = GetExactPathWithoutParameters(url);

            results.Add(hostname);

            results.AddRange(GetFourHostNames(hostname));

            results.Add(GetExactPath(url));

            results.Add(urlWithoutParameters);

            results.AddRange(GetFourPaths(hostname, urlWithoutParameters));

            return RemoveDuplicates(results);
        }


        /// <summary>
        /// Returns the exact hostname for the URL.
        /// </summary>
        /// <param name="url">URL to get its exact hostname.</param>
        /// <returns>The hostname.</returns>
        private string GetExactHostname(string url)
        {
            if (!url.StartsWith("http://"))
                url = "http://" + url;

            string result = url;
            Regex regEx = new Regex("http://[^/]*/");

            if (regEx.IsMatch(url))
                result = regEx.Match(url).Value;

            if (result.StartsWith("http://"))
                result = result.Remove(0, 7);

            if (!result.EndsWith("/"))
                result = result + "/";

            return result;
        }

        /// <summary>
        /// Returns four hostnames that match the incoming hostname by
        /// removing its components from the left.
        /// </summary>
        /// <param name="hostname">The hostname.</param>
        /// <returns>A list of up to four hostnames matching the input.</returns>
        private List<string> GetFourHostNames(string hostname)
        {
            List<string> results = new List<string>();

            Regex regEx = new Regex(@"[a-zA-Z0-9-]+");

            MatchCollection matches = null;
            if (regEx.IsMatch(hostname))
                matches = regEx.Matches(hostname);

            int count = matches.Count;

            if (count > 2)
                for (int index = count - 2; ((index > (count - 6)) && (index >= 0)); index--)
                {
                    List<string> components = new List<string>();

                    for (int reverseIndex = index; reverseIndex < count; reverseIndex++)
                    {
                        components.Add(matches[reverseIndex].Value);
                    }

                    string newHostname = String.Join(".", components.ToArray());

                    if (!newHostname.EndsWith("/"))
                        newHostname = newHostname + "/";

                    results.Add(newHostname);
                }

            return results;
        }

        /// <summary>
        /// Returns the exact path for the incoming URL.
        /// </summary>
        /// <param name="url">The URL.</param>
        /// <returns>String value of the exact path for the input.</returns>
        private string GetExactPath(string url)
        {
            string result = url;

            if (result.StartsWith("http://"))
                result = result.Remove(0, 7);

            string exactPathWithoutParameters = GetExactPathWithoutParameters(url);

            int dotIndex = exactPathWithoutParameters.LastIndexOf(".");
            int slashIndex = exactPathWithoutParameters.LastIndexOf("/");

            if (dotIndex < slashIndex)
                if (!result.EndsWith("/"))
                    result = result + "/";

            return result;
        }

        /// <summary>
        /// Returns the exact path for a URL without parameters.
        /// </summary>
        /// <param name="url">The URL.</param>
        /// <returns>String value of the exact path without parameters for the input.</returns>
        private string GetExactPathWithoutParameters(string url)
        {
            string result = url;

            if (result.StartsWith("http://"))
                result = result.Remove(0, 7);

            if (result.Contains("?"))
            {
                int pos = result.LastIndexOf("?");

                result = result.Remove(pos);
            }

            return result;
        }

        /// <summary>
        /// Returns four paths for the hostname and the URL without paramters.
        /// </summary>
        /// <param name="hostname">The hostname.</param>
        /// <param name="urlWithoutParameters">The URL without parameters.</param>
        /// <returns>A list of up to four strings for the matched paths.</returns>
        private List<string> GetFourPaths(string hostname, string urlWithoutParameters)
        {
            List<string> results = new List<string>();

            Regex regEx = new Regex(@"/[^/]+");

            MatchCollection matches = null;
            if (regEx.IsMatch(urlWithoutParameters))
                matches = regEx.Matches(urlWithoutParameters);

            int count = 0;

            if (matches != null)
                count = matches.Count;

            if (!urlWithoutParameters.EndsWith("/"))
                count--;

            for (int index1 = 0; ((index1 < 4) && (index1 < count)); index1++)
            {
                List<string> components = new List<string>();
                for (int index2 = 0; index2 <= index1; index2++)
                {
                    components.Add(matches[index2].Value);
                }
                string tempPath = string.Join("", components.ToArray());
                results.Add(hostname.Remove(hostname.Length - 1, 1) + tempPath + "/");
            }

            return results;
        }

        /// <summary>
        /// Removes duplicate items from the list.
        /// </summary>
        /// <param name="results">A list of strings with duplicate items.</param>
        /// <returns>A list of strings without duplicate items.</returns>
        private List<string> RemoveDuplicates(List<string> results)
        {
            List<string> finalResults = new List<string>();

            foreach (string item in results)
            {
                if (!finalResults.Contains(item))
                    finalResults.Add(item);
            }

            return finalResults;
        }
    }
}
